require(tidyverse)

rm(list = ls())
gc()

################################################################################
##
## Date:    2024-12-20
## Author:  james.h.bisbee@vanderbilt.edu
## Purpose: This script generates SI Figure 2.
## Inputs:  /scratch/jhb362/zilinsky_2023/data/results/VIMP_lasso/LASSO_2024_outcome-.*_period-years_temp-lag12.RData
##            - Variable importance results generated by NFR_lasso_prep.R
##            - Summarized on the NYU HPC into PSRM_lasso_comb.RData via NFR_data_prep.R
## Outputs: ./figures/figS2.pdf
##
################################################################################

# Compute details
print(paste0('Compute environment from ',Sys.Date(),' run by Bisbee'))
if(Sys.info()['sysname'] == 'Windows') {
  ram_size = system("wmic MemoryChip get Capacity", intern = TRUE)[-1]
  model_name = system("wmic cpu get name", intern = TRUE)[2] # nocov
  vendor_id = system("wmic cpu get manufacturer", intern = TRUE)[2] # nocov
  
  print(list(ram = stringr::str_squish(ram_size)[1],
             vendor_id = stringr::str_squish(vendor_id),
             model_name = stringr::str_squish(model_name),
             no_of_cores = parallel::detectCores()))
} else if(Sys.info()['sysname'] == 'Linuxs') {
  splitted <- strsplit(system("ps -C rsession -o %cpu,%mem,pid,cmd", intern = TRUE), " ")
  df <- do.call(rbind, lapply(splitted[-1], 
                              function(x) data.frame(
                                cpu = as.numeric(x[2]),
                                mem = as.numeric(x[4]),
                                pid = as.numeric(x[5]),
                                cmd = paste(x[-c(1:5)], collapse = " "))))
  df
} else {
  cat("If not on Linux or Windows, you'll have to figure out your own solution to seeing the compute environment.")
}

sessionInfo()

load('./data/VIMP_lasso/PSRM_lasso_comb.RData')

lookup <- comb %>%
  count(vars) %>%
  mutate(cat = gsub('_.*','',vars)) %>%
  mutate(type = ifelse(cat %in% c('AGECAT','EDUC','GENDER','ANNUALINC','MARST','PARTY','RACE'),'Individual Predictors','Geographic Predictors')) %>%
  mutate(levels = str_to_title(gsub('Living wi','',gsub('^ur$','Unemp Rate',gsub('lfpr','LF Participation Rate',gsub('_',' ',gsub('(\\d+)_(\\d+)','\\1-\\2',gsub('^[A-Z]+_','',gsub('ARRST_','',gsub('imputed_|_imputed','',vars)))))))))) %>%
  mutate(catLabs = ifelse(cat == 'AGECAT','AGE',
                          ifelse(cat == 'ECON','LBR',cat))) %>%
  mutate(levels= paste0(catLabs,': ',levels))


toplot <- comb %>%
  group_by(vars,period,outcome,propIncl) %>%
  filter(normInd == min(normInd)) %>%
  ungroup() %>%
  left_join(lookup) %>%
  group_by(catLabs,outcome,type) %>%
  filter(lambda == max(lambda)) %>%
  ungroup() %>%
  mutate(propIncl = ifelse(is.na(propIncl),1,propIncl)) %>%
  group_by(outcome) %>%
  mutate(wdth = scales::rescale(lambda,to = c(.2,.85))) %>%
  group_by(outcome) %>%
  mutate(MIPT = lambda == max(lambda)) %>%
  mutate(outcome = ifelse(outcome == 'ENOUGHMON','ENF.MONEY',
                          ifelse(outcome == 'NATECONIMP','NTL.ECON',outcome)))


outcomeLookup <- data.frame(outcome = unique(toplot$outcome),
                            outLab = c('Happy with community','Cutting back on spending',
                                       'Economic conditions excellent','Enough money to do what you want',
                                       'Feel good about financial situation','Feel good about amount of money',
                                       'Experienced happiness yesterday','Hillary Clinton Favorability',
                                       'Life ladder: 5 yrs','Life ladder: today','Able to make major purchase right now',
                                       'More than enough money to do what you want','Worried about money in last week',
                                       'Worry spent too much yesterday','Economic conditions getting worse',
                                       'Obama approval','Experienced sadness yesterday',
                                       'Standard of living satisfaction','Standard of living satisfaction compared to others',
                                       'Experienced stress yesterday','Trump approval','VA governor favorability',
                                       'Watching spending closely','Experienced worry yesterday'))

toplot <- toplot %>%
  ungroup() %>%
  left_join(outcomeLookup)

pdf('./figures/figS2.pdf',width = 8,height = 6)
toplot %>%
  filter(!grepl('governor',outLab)) %>%
  mutate(outLab = factor(outLab,levels = (toplot %>%
                                            filter(catLabs == 'PARTY') %>%
                                            group_by(catLabs,outLab) %>%
                                            summarise(lambda = mean(lambda)) %>%
                                            arrange(lambda) %>% .$outLab))) %>%
  mutate(catLabs = factor(catLabs,levels = c('PARTY','AGE','ANNUALINC','MARST','EDUC','RACE','LBR',
                                             'DEM','GENDER','HLTH','CRIME'))) %>%
  mutate(wdth = scales::rescale(lambda,to = c(.2,.85))) %>%
  mutate(type = factor(type,levels = c('Individual Predictors','Geographic Predictors'))) %>%
  ggplot(aes(x = catLabs,color = MIPT,size = MIPT,
             y = outLab,fill = lambda,
             width = wdth,height = wdth)) + 
  geom_tile() + 
  theme_bw() + 
  scale_color_manual(guide = 'none',values = c('FALSE' = NA,'TRUE' = 'black')) +
  scale_size_manual(guide = 'none',values = c('FALSE' = NA,'TRUE' = 1)) +
  scale_fill_gradient(name = 'Lambda',low = 'grey90',high = 'darkorange') + 
  scale_alpha_manual(name = '95%',values = c(.3,1)) + 
  facet_grid(~type,scales = 'free',space = 'free') + 
  scale_x_discrete(position = 'top') + 
  labs(y = 'Outcome',
       x = NULL,
       title = 'Most Important Predictor Categories by Outcome: 2008-2018',
       subtitle = '- Colored by largest lambda penalty\n- Most important predictor for each outcome indicated in black') + 
  theme(#legend.position = 'bottom',
    axis.text.x = element_text(angle = 45,hjust = 0))
dev.off()

# EOF